May 2025

Drug Review SQL Analytics

Explored trends in drug effectiveness, satisfaction, and ease of use using SQL Window Functions

Project Overview

Analyzed a cleaned drug review dataset (mydb.drug_clean) to answer analytical questions using SQL Window Functions, Ranking, Aggregations, and Common Table Expressions.

Task

Client request: Analyze drug review data using SQL window functions and aggregations.

Problem Statements & SQL Solutions

1. On Label vs Off Label Satisfaction by Condition

SELECT condition, indication, ROUND(avg_satisfaction, 2) 
FROM (
SELECT d.Condition, d.Indication, 
    AVG(Satisfaction) OVER(PARTITION BY d.condition, d.indication) AS avg_satisfaction,
    ROW_NUMBER() OVER(PARTITION BY d.condition, d.indication) r
FROM drug_clean d
) t
WHERE r = 1;

2. Avg Ease of Use & Satisfaction for Expensive Drugs by Type

WITH cte AS (
SELECT *, ROW_NUMBER() OVER(ORDER BY price) AS row_num
FROM drug_clean
)
SELECT Type, ROUND(AVG(EaseOfUse), 2), ROUND(AVG(Satisfaction), 2)
FROM cte
WHERE price > (
SELECT price FROM cte 
WHERE row_num = (SELECT ROUND(COUNT(*)/2) FROM cte)
)
GROUP BY Type;

3. Cumulative Distribution of EaseOfUse by Drug Type

SELECT type, 
SUM(EaseOfUse) OVER(PARTITION BY type ORDER BY EaseOfUse ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum,
SUM(EaseOfUse) OVER(PARTITION BY type) AS total_sum,
SUM(EaseOfUse) OVER(PARTITION BY type ORDER BY EaseOfUse ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) /
SUM(EaseOfUse) OVER(PARTITION BY type) AS cumulative_distribution
FROM drug_clean;

4. Median Satisfaction by Condition

WITH cte AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY Condition ORDER BY Satisfaction ASC) AS row_num
FROM drug_clean
)
SELECT Condition, ROUND(Satisfaction, 2) AS Median_Satisfaction
FROM cte
WHERE row_num = (
SELECT ROUND(COUNT(*)/2) FROM cte c2 WHERE c2.Condition = cte.Condition
)
ORDER BY Median_Satisfaction DESC;

5. Running Average Price by Condition

SELECT condition,
ROUND(AVG(price) OVER(PARTITION BY condition ORDER BY price ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), 2) AS Running_Average
FROM drug_clean
ORDER BY condition;

6. Percent Change in Reviews for Each Drug

SELECT drug, reviews,
((reviews - LAG(reviews) OVER(PARTITION BY drug ORDER BY reviews DESC)) / 
    LAG(reviews) OVER(PARTITION BY drug ORDER BY reviews DESC)) * 100 AS percent_change_review
FROM drug_clean;

7. Satisfaction Contribution by Drug Type

SELECT *, ROUND((satisfaction_level / SUM(satisfaction_level) OVER()) * 100, 2) AS percentage_satisfaction
FROM (
SELECT type, SUM(satisfaction) AS satisfaction_level
FROM drug_clean
GROUP BY type
) t
ORDER BY percentage_satisfaction DESC, type;

8. Cumulative Effective Rating by Condition and Form

SELECT condition, drug, form, effective,
SUM(effective) OVER(PARTITION BY condition, form ORDER BY effective ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum
FROM drug_clean
ORDER BY condition, form, drug;

9. Rank of Avg Ease of Use by Drug Type

SELECT type,
AVG(EaseOfUse) AS avg_ease,
RANK() OVER(ORDER BY AVG(EaseOfUse) DESC) AS rank
FROM drug_clean
GROUP BY type
ORDER BY rank, type DESC;

10. Avg Effectiveness of Top 3 Most Reviewed Drugs per Condition

SELECT *
FROM (
SELECT condition, drug, reviews,
    AVG(effective) OVER(PARTITION BY condition, drug ORDER BY reviews DESC) AS avg_eff,
    DENSE_RANK() OVER(PARTITION BY condition ORDER BY reviews DESC) AS r
FROM drug_clean
) t
WHERE r < 4
ORDER BY condition, r;
                                
```